Here is the report of the network analysis of the crop health data.

library(dplyr)
library(plyr)
library(reshape)
library(reshape2)
library(gridExtra)
library(lubridate)
library(doBy)
library(cluster)
library(ggplot2)
library(scales)
library(bioDist)
library(vegan)
library(mvtnorm)
library(igraph)
library(WGCNA)
## ==========================================================================
## *
## *  Package WGCNA 1.47 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=4
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=4
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================
library(cowplot)
library(NMF)

Load the survey data

library(RCurl) # run this package for load the data form the website 

file <- getURL("https://docs.google.com/spreadsheets/d/1zB7gNdI7Nk7SuHuPWcjzaKnjuwkvL6sOVMo0zMfuV-c/pub?gid=558862364&single=true&output=csv") # load data from the google drive

data <- read.csv(text = file) # read data which is formated as the csv
data[data == "-"] <- NA # replace '-' with NA

data[data == ""] <- NA # replace 'missing data' with NA

#==== to lower variable names ====
names(data) <- tolower(names(data)) # for more consistancy

select out the column which is not inlcuded in the analysis

# remove the variables that are not included for analysis
data$phase <- NULL # there is only one type yype of phase in the survey
data$identifier <- NULL # this variable is not included in the analysis
data$village <- NULL # remove name of village
data$year <- NULL # remove year data
data$season <- NULL # remove season data
data$lat <- NULL # remove latitude data
data$long <- NULL # remove longitude data
data$fa <- NULL # field area is not include in the analysis
data$fn <- NULL # farmer name can not be included in this survey analysis
data$fp <- NULL # I do not know what is fp
data$lfm <- NULL # there is only one type of land form in this survey
data$ced <- NULL # Date data can not be included in the network analysis
data$cedjul <- NULL # remove crop establisment julian date data
data$hd <- NULL # Date data can not be included in the network analysis
data$hdjul <- NULL # remove harvest julian date
data$cvr <- NULL # reove crop varieties
data$varcoded <- NULL # I will recode them 
data$fymcoded <- NULL # remove code data of fym
data$mu <- NULL # no record of mullucicide data
data$nplsqm <- NULL # remove number of plant per square meter
data$rbpx <- NULL # no record of rice bug p
#==== corract the variable type =====
# reformat of the vaibales
data <- transform(data, 
                  country = as.factor(country),
                  pc = as.factor(pc),
                  cem = as.factor(cem),     
                  ast = as.factor(ast),       
                  ccd = as.numeric(ccd),
                  vartype = as.factor(vartype),
                  fym = as.character(fym),
                  n = as.numeric(n),
                  p = as.numeric(p) ,
                  k = as.numeric(k),
                  mf = as.numeric(mf),        
                  wcp = as.factor(wcp),      
                  iu = as.numeric(iu),     
                  hu = as.numeric(hu),      
                  fu = as.numeric(fu),      
                  cs  = as.factor(cs),      
                  ldg  =  as.numeric(ldg),  
                  yield = as.numeric(yield) ,
                  dscum = as.factor(dscum),   
                  wecum = as.factor(wecum),   
                  ntmax = as.numeric(ntmax), 
                  npmax = as.numeric(npmax),    
                  nltmax = as.numeric(nltmax),  
                  nlhmax = as.numeric(nltmax),  
                  waa = as.numeric(waa),      
                  wba = as.numeric(wba) ,   
                  dhx =  as.numeric(dhx),  
                  whx =  as.numeric(whx),     
                  ssx  = as.numeric(ssx),  
                  wma = as.numeric(wma), 
                  lfa = as.numeric(lfa),
                  lma = as.numeric(lma),   
                  rha  = as.numeric(rha) ,
                  thrx = as.numeric(thrx),    
                  pmx = as.numeric(pmx),    
                  defa  = as.numeric(defa) ,
                  bphx = as.numeric(bphx),   
                  wbpx = as.numeric(wbpx),    
                  awx  = as.numeric(awx), 
                  rbx =as.numeric(rbx),   
                  rbbx = as.numeric(rbbx),  
                  glhx  = as.numeric(glhx), 
                  stbx=as.numeric(stbx),    
                  hbx= as.numeric(hbx),
                  bbx = as.numeric(bbx),    
                  blba = as.numeric(blba),    
                  lba = as.numeric(lba),    
                  bsa = as.numeric(bsa),    
                  blsa = as.numeric(blsa),  
                  nbsa = as.numeric(nbsa),  
                  rsa  = as.numeric(rsa),   
                  lsa = as.numeric(lsa),    
                  shbx = as.numeric(shbx) ,  
                  shrx = as.numeric(shrx),    
                  srx= as.numeric(srx),    
                  fsmx = as.numeric(fsmx),   
                  nbx =  as.numeric(nbx),   
                  dpx = as.numeric(dpx),    
                  rtdx  = as.numeric(rtdx),  
                  rsdx  = as.numeric(rsdx),
                  gsdx  =as.numeric(gsdx),   
                  rtx = as.numeric(rtx)
) 

Now data are in the right format and ready to further analysis, but there are some variables needed to code as the number not character

Before proforming cluster analysis which is the further analysis, I need to code the character to number

recode the previous crop

if previosu crop data are rice, they will be coded as 1, but others, not rice, they will be coded as 0.

data$pc <- ifelse(data$pc == "rice", 1, 0)
recode the crop establisment mothods

Transplanting rice is 1, and direct seeded rice is 2.

#Crop establisment method
levels(data$cem)[levels(data$cem) == "trp"] <- 1
levels(data$cem)[levels(data$cem) == "TPR"] <- 1
levels(data$cem)[levels(data$cem) == "DSR"] <- 2
levels(data$cem)[levels(data$cem) == "dsr"] <- 2
fym

fym there are two type 0 and 1, raw data are recorded as no, yes, and value, if the value is 0 which mean 0 and if the value more than 0 which means 1

data$fym <- ifelse(data$fym == "no", 0, 
                   ifelse(data$fym == "0", 0, 1
                   )
)
Vartype

Vartype there are three type treditional varieties coded as 1, modern varities coded as 2 and hybrid coded as 3.

data$vartype <- ifelse(data$vartype == "tv", 1,
                       ifelse(data$vartype == "mv", 2,
                              ifelse(data$vartype == "hyb", 3, NA
                              )
                       )
)
Weed control practices

Weed management practices have three type, hand coded as 1, herb coded as 2, and herb-hand coded as 3.

# wcp weed control management
levels(data$wcp)[levels(data$wcp) == "hand"] <- 1

levels(data$wcp)[levels(data$wcp) == "herb"] <- 2

levels(data$wcp)[levels(data$wcp) == "herb-hand"] <- 3
Crop status

Crop status have five level, very poor coded as 1, poor coded as 2, average coded as 3, good coded as 4 and very good coded as 5.

# Crop Status
levels(data$cs)[levels(data$cs) == "very poor"] <- 1

levels(data$cs)[levels(data$cs) == "poor"] <- 2

levels(data$cs)[levels(data$cs) == "average"] <- 3

levels(data$cs)[levels(data$cs) == "good"] <- 4

levels(data$cs)[levels(data$cs) == "very good"] <- 5

Pre-process before performing cluster analysis

all data should be numeric data

#clean the data
num.data <- apply(data[, -c(1,2)], 2, as.numeric) # create dataframe to store the numerical transformation of raw data excluded fno and country

num.data <- as.data.frame(as.matrix(num.data)) # convert from vector to matrix

data <- cbind(data[ , c("fno", "country")], num.data)

data <- data[ , apply(data[, -c(1,2)], 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0

data <- data[complete.cases(data), ] # exclude row which cantain NA

Cluster analysis of the production sitation

Cluster analyses using a nearest neighbor and a chi-square distance (15) were performed in subsets representing each site, in order to determine site-specific patterns of cropping practices and injury profiles. Clusters including less than n = 3 fields were disregarded from further steps. Cluster analysis can be applied to categorized data, and in this case with a chi-square distance.

Cluster analysis allows groups of plots haring similar chacteristics to be identified. It was used here to define clusters of injury types in a way similar to that used in the analysis of surveys. One feature is that this technique, when applied to results from factorially arranged experiments, generate clusters where one injury type often tend to predominate. One result from cluster analysis is that inclusion of one individual to a particular cluster can be seen as a new, synthetic variable. Each plot, therefore, can then be described as one element of a given cluster of injury type, where often one injury predominates.

  1. subset productio situation data
start.PS <- "pc"

end.PS <- "fu"

start.col.PS <- match(start.PS, names(data))

end.col.PS <- match(end.PS, names(data))

PS.data <- data[, c(1, start.col.PS:end.col.PS)]
  1. clustering the variables related to production situation by Eclidean distanct
#distance matrix
dist.PS <- daisy(PS.data[-1]) # Dissimilarity Matrix Calculation from cluster library, "grower" method can handel the mix variables.
## Warning in daisy(PS.data[-1]): binary variable(s) 1, 2, 6 treated as
## interval scaled
cluster.PS <- hclust(dist.PS, method = "average")

dendro.PS <- as.dendrogram(cluster.PS)

plot(dendro.PS, center = T, nodePar = list(lab.cex = 0.6, lab.col = "black", pch = NA), main = "Dendrogram for Production situation")

# draw retangles
rect.hclust(tree = cluster.PS, k=2, border = c("red", "blue"))

Here is the heatmap and dendrogram showing that Nitrogen is the big influence to seperate the observation into two groups.

aheatmap(subset(PS.data, select = -c(fno, mf)), distfun = daisy, hclustfun = "average")
## Warning in distfun(mat): binary variable(s) 1, 2, 6 treated as interval
## scaled

We realized that the total number of observation of surveys are 420. If we have many clusters, It will affect of reliability of the results because the number of observations is small. So we decided to cluster into two groups.

#number of members in each cluster
PS.no <- cutree(cluster.PS, k = 2) #

# cophenitic correlation
rcluster.PS <- cophenetic(cluster.PS) # identify which observation drop in which groups

#cor(dist.PS, rcluster.PS) # Cophenetic correlation is an measure of how faitfully a dendrogram perserves the pairwise distance between the original unmodeled points
# It should be very close to 1 for a high-quality solution.

data <- cbind(data, PS.no) # add to this data into the raw data set

data$PS.no <- as.factor(data$PS.no) # define this value as the factor

Exploration of the profiles of production situtation in grop 1 and group 2

How many observation of each country ?
g <- ggplot(data, aes(x = country)) +
  
  geom_histogram(weights = count) +
  
  stat_bin(geom = "text", aes(label = ..count..), vjust = 1.5, color = "white" ) +
 
   scale_y_continuous(limits = c(0, 125), breaks = NULL ) +
  
  theme(legend.position = "none") +
  
  ggtitle("The no of observation of dataset in each country")

print(g)
## ymax not defined: adjusting position using y instead

  1. create the data subseted by country because we would rather to know the difference between cluster 1 and 2 in same country.
name.country <- c("IDN", "IND", "THA", "VNM", "PHL") #sort(as.vector(unique(groups)))

by.country.data <- list()

for(i in 1:length(name.country)){
  
  temp <- data %>% filter(country == name.country[i])
  
  by.country.data[[i]] <- temp
  
}

How many obervations in each group in each country?

par(mfrow=c(1, 2))
for(i in 1:length(name.country)) {
  
  g <- ggplot(by.country.data[[i]], aes(x = PS.no)) +
    
    geom_histogram(weights = count) +
    
    ggtitle(paste("Histogram PS cluster of", name.country[i])) +
    
    theme(legend.position="none")

  print(g)   

  }

  1. explore the distribution of each variable in differnt grups of production sitaion sepreated by country.
# The profile of PS no 
clus.PS.data <- data[, -c(1, 17:56)] # select only the production sitation data

m.PS.data <- melt(clus.PS.data)
## Using country, PS.no as id variables
name.variable <- levels(m.PS.data$variable)

name.PS.no <- levels(m.PS.data$PS.no)
# check the profile of each cluster by histogram 
layout(matrix(c(1:14), 14, 2, byrow = TRUE))
for(i in 1:length(name.variable)) {
  
  subtemp <- subset(m.PS.data, variable == name.variable[i])
 
   for(j in 1: length(name.PS.no)){
     subtemp1 <- subtemp %>% filter(PS.no == name.PS.no[j])
     
     g <-  ggplot(subtemp1, aes(x = value)) +
       
       geom_bar(aes(y = (..count..)/sum(..count..)), binwidth = 1) + 
       
       scale_y_continuous(limits = c(0, 1), labels = percent) + 
       
       ylab("Percent") + xlab(paste(name.variable[i])) +
       
       ggtitle(paste("Histogram of", name.variable[i], "in PS. no", name.PS.no[j]))

     print(g) # the normal rscript do not need to use print function for express graph but here is rknit 
     
   }
  
  }

#head(data)
# select only the variables related to the injury profiles

start.IP <- "dhx" # set to read the data from column named "dhx"

end.IP <- "rtx" # set to read the data from column named "rtx"

start.col.IP <- match(start.IP, names(data)) # match function for check which column of the data mactch the column named following the conditons above

end.col.IP <- match(end.IP, names(data)) # match function for check which column of the data mactch the column named following the conditons above

IP.data <- data[start.col.IP:end.col.IP] # select the columns of raw data which are following the condition above

#==== pre-process before run correlaiton function
# because the correlaiton measures will not allow to perform with variables whihc have not variance.

IP.data <- IP.data[ ,apply(IP.data, 2, var, na.rm = TRUE) != 0] # exclude the column (variables) with variation = 0

groups <- paste(data$country, data$PS.no, sep = "_") #combine two cloumn names country and PS

IP.data <- cbind(groups, IP.data)

IP.data[is.na(IP.data)] <- 0

trts <- as.vector(unique(IP.data$groups))

Co-occurence Analysis

The analysis presented in paper is designed to capture the co-occurence patterns with in the inju The analysis presented is designed to test for differences in co-occurrence patterns at the community level across ecosystems, identify modules of co-occurring microorganisms within communities, and identify pairwise co-occurrence patterns within modules that are consistent across ecosystems (summarized in Figure 1). We considered co-occurrence to be positive rank correlations (Spearman’s correlation) between pairs of microbes within each dataset with the strength of the relationship represented by the correlation coefficient (Figure 1B). Negative correlations (indica- tive of either competitive interactions or non-overlapping niches between microbes; Faust and Raes, 2012) were also included in this analysis though they were a small subset of our combined datasets. We only considered negative and positive co-occurrence relationships based on strength of correlation (i.e., ρ from the Spearman’s correlation) at values less than or equal to −0.75 and −0.5 or greater than 0.5 and 0.75.

Spearman’s correlation was used because it obnly check if two OTU are monotonically related, rather than having a linear relationship. As a result is less sensitive to difference in abundance, and thios was seesireable because abumndance information may have I should rtake nothe definition of the value that we collected in the surveys Co-occurence was assessed using a previosuly descripted methos ()

#=====co_occurrence_pairwise.R====
results <- matrix(nrow = 0, ncol = 7) # create results to store the outputs

options(warnings = -1) # setting not to show the massages as the warnings

for(a in 1:length(trts)){
  # subset the raw data by groups of countries and cluster of production situation
  trt.temp <- trts[a]
  #subset the dataset for those treatments
  temp <- subset(IP.data, groups == trt.temp)
  
  #in this case the community data started at column 6, so the loop for co-occurrence has to start at that point
  for(b in 2:(dim(temp)[2]-1)){
    #every species will be compared to every other species, so there has to be another loop that iterates down the rest of the columns
    for(c in (b+1):(dim(temp)[2])){
      
      #summing the abundances of species of the columns that will be compared
      species1.ab <- sum(temp[,b])
      
      species2.ab <- sum(temp[,c])
      #if the column is all 0's no co-occurrence will be performed
      if(species1.ab >1 & species2.ab >1){
        
        test <- cor.test(temp[,b], temp[,c], method = "spearman", na.action = na.rm, exact = FALSE)
        # There are warnings when setting exact = TRUE because of ties from the output of Spearman's correlation
        # stackoverflow.com/questions/10711395/spear-man-correlation and ties
        # It would be still valid if the data is not normally distributed.
       
         rho <- test$estimate
        
         p.value <- test$p.value
      }
      
      if(species1.ab <=1 | species2.ab <= 1){
       
         rho<-0
        
        p.value<-1
      } 
      
      new.row <- c(trts[a], names(temp)[b], names(temp)[c], rho, p.value, species1.ab, species2.ab)
      
      results <- rbind(results, new.row)            
      
    }
    
  }
 
   print(a/length(trts))
  
}
## [1] 0.1111111
## [1] 0.2222222
## [1] 0.3333333
## [1] 0.4444444
## [1] 0.5555556
## [1] 0.6666667
## [1] 0.7777778
## [1] 0.8888889
## [1] 1
results <- data.frame(data.matrix(results))

names(results) <- c("trt","taxa1","taxa2","rho","p.value","ab1","ab2")

#making sure certain variables are factors
results$trt <- as.factor(results$trt)
results$taxa1 <- as.character(as.factor(results$taxa1))
results$taxa2 <- as.character(as.factor(results$taxa2))
results$rho <- as.numeric(as.character(results$rho))
results$p.value <- as.numeric(as.character(results$p.value))
results$ab1 <- as.numeric(as.character(results$ab1))
results$ab2 <- as.numeric(as.character(results$ab2))

head(results)
##     trt taxa1 taxa2         rho      p.value  ab1  ab2
## 1 PHL_1   dhx   whx  0.06524012 0.6891879742 1307 2089
## 2 PHL_1   dhx   ssx -0.12272044 0.4506097948 1307  109
## 3 PHL_1   dhx  defa  0.18989475 0.2405423584 1307 1941
## 4 PHL_1   dhx  bphx -0.11699678 0.4721633481 1307 1286
## 5 PHL_1   dhx  wbpx -0.50743562 0.0008316712 1307   84
## 6 PHL_1   dhx   awx  0.00000000 1.0000000000 1307    1
  results_sub <- subset(results, as.numeric(as.character(abs(rho))) > 0.25)

  results_sub.by.group <- list()

  name.groups <- name.groups <- c("IDN_1", "IDN_2", "IND_1", "IND_2", "THA_1", "THA_2", "VNM_1", "VNM_2", "PHL_1") #sort(as.vector(unique(groups)))

Network analysis

  for(i in 1: length(name.groups)){
  
  results_sub.by.group[[i]] <- subset(results_sub, trt == name.groups[i])
  }
# head(results_sub.by.group[[1]][,2:3]) # get the list
#layout(matrix(c(1:14), 9, 2, byrow = TRUE))
  par(mfrow = c(1,2))
  g  <- list()

for(i in 1:length(name.groups)){
  
  g[[i]] <- graph.edgelist(as.matrix(results_sub.by.group[[i]][, 2:3]), directed = FALSE)
#== adjust layout

  l <- layout.circle(g[[i]])
#== adjust vertices
  V(g[[i]])$color <- "tomato"
  V(g[[i]])$frame.color <- "gray40"
  V(g[[i]])$shape <- "circle"
  V(g[[i]])$size <- 25
  V(g[[i]])$label.color <- "white"
  V(g[[i]])$label.font <- 2
  V(g[[i]])$label.family <- "Helvetica"
  V(g[[i]])$label.cex <- 0.7

#== adjust the edge
  E(g[[i]])$weight <- as.matrix(results_sub.by.group[[i]][, 4])
  
  E(g[[i]])$width <- 1 + E(g[[i]])$weight*5
  
  col <- c("firebrick3", "forestgreen")
  
  colc <- cut(results_sub.by.group[[i]]$rho, breaks = c(-1, 0, 1), include.lowest = TRUE)
  
  E(g[[i]])$color <- col[colc]

#== plot network model
  plot(g[[i]], layout = l * 1.0, main = paste( "network model of", name.groups[i]))
}

  network.value <- list()

for(i in 1:length(g)){

  E(g[[i]])$weight <- as.matrix(abs(results_sub.by.group[[i]][, 4]))

  network.value[[i]] <- data.frame(
  id = V(g[[i]])$name, 
  deg = degree(g[[i]]), # degree
  bet = betweenness(g[[i]]), # betweenness
  clo = closeness(g[[i]]), # closeness
  eig = evcent(g[[i]])$vector,# egin.cent
  cor = graph.coreness(g[[i]]), # coreness
  tra = transitivity(g[[i]], type = c("local")) # cluster coefficients
)

  network.value[[i]]$res <- as.vector(lm(eig ~ bet, data = network.value[[i]])$residuals)
  network.value[[i]]$group <- name.groups[i]
}

  net.vertice.data <- as.data.frame(do.call("rbind", network.value))
  row.names(net.vertice.data) <- NULL
  net.vertice.data$id <- as.factor(net.vertice.data$id)
  net.vertice.data$group <- as.factor(net.vertice.data$group)
  m.net.vertice.data <- melt(net.vertice.data)
## Using id, group as id variables
# compare degree

  m.net.vertice.data %>% filter(variable == "deg") %>% 
   ggplot(aes(x= value, y = id)) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
  ggtitle("Degree of each node in the injury profile network")

  m.net.vertice.data %>% filter(variable == "bet") %>% 
   ggplot(aes(x= value, y = id)) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
   ggtitle("Betweenness of each node in the injury profile network")

 m.net.vertice.data %>% filter(variable == "bet") %>% 
   ggplot(aes(x= value, y = id)) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
   ggtitle("Betweenness of each node in the injury profile network")

m.net.vertice.data %>% filter(variable == "clo") %>% 
   ggplot(aes(x= value, y = id)) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)) +
   ggtitle("Closeness of each node in the injury profile network")

m.net.vertice.data %>% filter(variable == "eig") %>% 
   ggplot(aes(x= value, y = reorder(id, value))) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(
              panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)
              ) +
   ggtitle("Eigenvector of each node in the injury profile network")

m.net.vertice.data %>% filter(variable == "cor") %>% 
   ggplot(aes(x= value, y = reorder(id, value))) + 
        geom_point(size = 3, color ="blue") +
   facet_wrap(~group) +
        theme_bw() +
        theme(
              panel.grid.major.x =  element_blank(),
              panel.grid.minor.x =  element_blank(),
              panel.grid.major.y = element_line(color = "grey", linetype = 3)
              ) +
   ggtitle("Eigenvector of each node in the injury profile network")

One method is to plot / regress eigenvector centrality on betweenness and examine the residue.

  • A variable or node with high betweenness and low eigenvector centrality may be an importanct gatkeeper to a central actor

  • A variable or node with low betweeness and hight eigenvector centrality may have unique access to central actor

layout(matrix(c(1:9), 5, 2, byrow = TRUE))
## Warning in matrix(c(1:9), 5, 2, byrow = TRUE): data length [9] is not a
## sub-multiple or multiple of the number of rows [5]
for(i in 1:length(network.value)){
 plot <- ggplot(
  network.value[[i]], aes(x = bet, y = eig,
               label = rownames(network.value[[i]]),
               color = res,
               size = abs(res))) +
    xlab("Betweenness Centrality") +
    ylab("Eigencvector Centrality") +
    geom_text() +
    ggtitle(paste("Key Actor Analysis for Injuiry Profiles of", name.groups[i]))

print(plot) 
}

# dd <- degree.distribution(g[[1]], cumulative = T, mode = "all")
# plot(dd, pch = 19, cex = 1, col = "orange", xlab = "Degree", ylab = "Cumulative Frequesncy")
# distances(g[[1]])
# #shortest_paths(g[[1]], 5)
# all_shortest_paths(g[[1]], 1, 6:8)
# mean_distance(g[[1]])
# hist(degree(g[[1]]),  col = "lightblue", xlim = c(0, 10), xlab = "Vertex Degree", ylab = "Frequency", main = "")
# hist(graph.strength(g[[1]]),  col = "pink", xlim = c(0, 5), xlab = "Vertex strength", ylab = "Frequency", main = "")

#global.prop <- NULL
#i <-1
#for(i in 1:length(g)){
#adj.mat <- as.matrix(get.adjacency(g[[i]], attr = "weight"))
#new.global.prop <- as.data.frame(fundamentalNetworkConcepts(adj.mat))
#names(new.global.prop) <- name.groups[i]
#global.prop <- c(global.prop, new.global.prop)
#net.result <- fundamentalNetworkConcepts(mat[[i]])
#conformityBasedNetworkConcepts(mat)
#}
#sum.global <- as.data.frame(do.call("cbind", global.prop))
#rownames(sum.global) <- c("Density", "Centralization", "Heterogeneity", "Mean ClusterCoef", "Mean Connectivity")
# cleate all the possible pair of the variables
# IP.list <- NULL
#   for(i in 2:(dim(IP.data)[2]-1)){
#      for(j in (i+1):(dim(IP.data)[2])){
#       new.row <- c(names(IP.data)[i], names(IP.data)[2 + j])
#       IP.list <- rbind(IP.list, new.row)          
#     }
#   }
# 
# IP.list <- as.data.frame(IP.list)
# names(IP.list) <- c("var1", "var2")
# IP.list$rho <- 0
# results$newrho <- ifelse(abs(results$rho) > 0.25, results$rho, 0)
#  
# IDN1 <- results %>% filter(trt == "IDN_1")
#  IDN2 <- results %>% filter(trt == "IDN_2")
#  IND1 <- results %>% filter(trt == "IND_1")
#  IND2 <- results %>% filter(trt == "IND_2")
#  THA1 <- results %>% filter(trt == "THA_1")
#  THA2 <- results %>% filter(trt == "THA_2")
#  VNM1 <- results %>% filter(trt == "VNM_1")
#  VNM2 <- results %>% filter(trt == "VNM_1")
#  PHL1 <- results %>% filter(trt == "PHL_1")